%
% tutorial: http://www.iam.ubc.ca/~newbury/tex/bibtex.html
%

% LONG VERSION
@Comment
@string{vldb = "Proc. of Very Large Database (VLDB)"}
@Comment
@string{icde = "Int. Conf. Data Engineering (ICDE)"}
@Comment
@string{kdd = "Int'l Conf. on Knowledge Discovery and Data Mining (SIGKDD)"}
@Comment
@string{sigmod = "Int'l Conf. Management of Data (SIGMOD)"}
@Comment
@string{icdm = "Int'l Conf. Data Mining (ICDM)"}
@Comment
@string{edbt = "Int'l Conf. on Extending Database Technology (EDBT)"}
@Comment
@string{icml = "Int'l Conf. on Machine Learning (ICML)"}
@Comment
@string{pods = "ACM Symposium on Principles of Database Systems (PODS)"}
@Comment
@string{icdt = "International Conference on Database Theory (ICDT)"}
@Comment
@string{focs = "IEEE Symposium on Foundations of Computer Science (FOCS)"}
@Comment
@string{sdm = "SIAM Int'l Conf. on Data Mining (SDM)"}
@Comment
@string{ijcai = "Int'l Joint Conf. on Artificial Intelligence"}

% SHORT VERSION
@string{vldb = "VLDB"}
@string{icde = "ICDE"}
@string{kdd = "SIGKDD"}
@string{sigmod = "SIGMOD"}
@string{icdm = "ICDM"}
@string{edbt = "EDBT"}
@string{icml = "ICML"}
@string{pods = "PODS"}
@string{icdt = "ICDT"}
@string{focs = "FOCS"}
@string{sdm = "SIAM Data Mining"}
@string{ijcai = "IJCAI"}

% AAAAAAAAAAAAAA
@Article{abbott-db,
  author =   {Alison Abbott},
  title =    {Bioinformatics institute plans public database for gene expression data},
  journal =      {Nature},
  year =     1999,
  volume =   398,
  pages =    646
}



@Misc{expressml,
  author =   {Sudeshna Adak and Biplav Srivastava et al},
  title =    {e{X}press{ML} - A Common Data Representation for Organizing and Managing Annotations of Biochip Expression Data},
  howpublished = {http://biplav.com/bsc/expressml/expressml\_details.htm}
}

@inproceedings{aggarwal00,
  author =   "R. C. Agarwal and C. C. Aggarwal and V.V.V Parsad",
  title =    "Depth First Generation of Long Patterns",
  booktitle =    kdd,
  year =     2000,
}

@inproceedings{ proclus,
  author =   "C. C. Aggarwal and C. Procopiuc and J. Wolf and
                  P. S. Yu and J. S. Park",
  title =    "Fast algorithms for projected clustering",
  booktitle =    sigmod,
  year =     1999
}

@inproceedings{ genproclus,
  author =   "C. C. Aggarwal and P. S. Yu",
  title =    "Finding generalized projected clusters in high
                  dimensional spaces",
  booktitle =    sigmod,
  pages =    "70--81",
  year =     2000
}

@inproceedings{clique,
  author =   "R. Agrawal and J. Gehrke and D. Gunopulos and
                  P. Raghavan",
  title =    "Authomatic subspace clustering of high dimensional
                  data for data mining applications",
  booktitle =    sigmod,
  year =     "1998"
}

@inproceedings{ agrawal93mining,
  author =   "R. Agrawal and T. Imielinski and A. Swami",
  title =    "Mining association rules between sets of items in
                  large databases",
  text =     "Rakesh Agrawal, Tomasz Imielinski, and Arun
                  Swami. Mining association rules between sets of
                  items in large databases. In Proc. of the ACM SIGMOD
                  Conference on Management of Data, pages 207--216,
                  Washington, D.C., May 1993.",
  booktitle =    vldb,
  pages =    "207-216",
  year =     "1993",
}

@inproceedings{sequential96,
  author =   "R. Srikant and R. Agrawal",
  title =    "Mining sequential patterns: generalization and
                  performance improvements",
  booktitle =    edbt,
  year =     "1996"
}

@inproceedings{agrawal94,
  author =   "R. Agrawal and R. Srikant",
  title =    "Fast Algorithms for Mining association rules",
  booktitle =    vldb,
  year =     "1994",
}

@TechReport{RA94report,
  author =   {R. Agrawal and R. Srikant},
  title =    {Fast algorithms for mining association rules in
                  large databases},
  institution =  {IBM Almaden Research Center},
  year =     1994,
  number =   {Research Report RJ 9839},
  address =  {San Jose, California},
  month =    {June}
}

@inproceedings{AD91,
  author =   "H. Almuallim and T. Dietterich",
  title =    "Learning with many irrelevant features",
  booktitle =    "Proceedings of the Ninth National Conference on
                  Artificial Intelligence",
  pages =    "547-552",
  year =     1991
}

% BBBBBBBBBBBBBBBBBB
@inproceedings{Bayardo00,
  title =    "Efficiently Mining Long Patterns from Database",
  author =   "R.J. Bayardo",
  booktitle =    sigmod,
  year =     1998,
  pages =    "85-93",
}

@inproceedings {rstar,
  author =   "N. Beckmann and H. P. Kriegel and R. Schneider and
                  B. Seeger",
  title =    "The {R}*-tree: an efficient and robust access method
                  for points and rectangles",
  booktitle =    sigmod,
  year =     1990,
  pages =    "322-331"
}


@Manual{berkeley,
  title =    {The Berkeley Database (Berkeley {DB})},
  organization = {Sleepycat Software},
  address =  {http://www.sleepycat.com}
}

@inproceedings{bivar,
  author =   "J. Bioch and O. van der Meer and R. Potharst",
  title =    "Bivariate Decision Trees",
  booktitle =    "Principles of Data Mining and Knowledge Discovery",
  year =     1997
}

@inproceedings{UCI,
  author =   "C. Blake and C. Merz",
  title =    "{UCI} Repository of machine learning databases",
  booktitle =    "Univ. of California, Dept. of Information and
                  Computer Science",
  year =     1998
}


@Article{onestop,
  author =   {Alvis Brazma and Alan Robinson and Graham Cameron and Michael Ashburner},
  title =    {One-stop shop for microarray data},
  journal =      {Nature},
  year =     2000,
  volume =   403,
  pages =    {699-700}
}

@Book{BF84,
  author =   "L. Breiman and J. Friedman and R. Olshen and
                  C. Stone",
  title =    "Classification and Regression Trees",
  publisher =    "Wadsworth",
  year =     "1984"
}

@inproceedings{mulvar,
  author =   "C. Brodley and P. Utgoff",
  title =    "Multivariate Decision Trees",
  booktitle =    "Technical Report 92-82, Dept. of Computer Sci.,
                  Uni. of Massachusetts",
  year =     1992
}

@TechReport{lmdt,
  author =   {C. E. Brodley and Paul E. Utgoff},
  title =    {Multivariate versus univariate decision trees},
  institution =  {Dept. of Computer Science, University of
                  Massachusetts},
  year =     1992,
  number =   {COINS-CR-92-8}
}

@Article{dnaarray,
  author =   {P. O. Brown and D. Botstein},
  title =    {Exploring the new world of the genome with {DNA} microarrays},
  journal =  {Nature Genetics},
  year =     1999,
  volume =   21,
  pages =    {33-37}
}

% CCCCCCCCCCCCCCCCCCC
@inproceedings{Car93,
  author =   "C. Cardie",
  title =    "Using decision trees to improve case-based learning",
  booktitle =    icml,
  pages =    "25-32",
  year =     1993
}

@InProceedings{Sur98,
  author =   "Surajit Chaudhuri",
  title =    "Data Mining and Database Systems: Where is the
                  Intersection?",
  booktitle =    "Bulletin of the IEEE Computer Society Technical
                  Committee on Data Engineering",
  year =     "1998",
}

@inproceedings{ enclus,
  author =   "C. H. Cheng and A. W. Fu and Y. Zhang",
  title =    "Entropy-based Subspace Clustering for Mining
                  Numerical Data",
  booktitle =    kdd,
  pages =    "84-93",
  year =     1999
}

@inproceedings{bicluster,
  author =   "Y. Cheng and G. Church",
  title =    "Biclustering of Expression Data",
  booktitle =    "Proc. of 8th International Conference on Intelligent
                  System for Molecular Biology",
  year =     2000
}

@InProceedings{Coo01,
  author =   {Brian F. Cooper and Neal Sample and Michael Franklin and G,Am(Bsli Hjaltason and Moshe Shadmon},
  title =    {A Fast Index for Semistructured Data},
  booktitle =    vldb,
  pages = "341-350",
  month = "September",
  year =     2001
}

% DDDDDDDDDDDDDDDDDDDDD
@misc{ deogun97data,
  author =   "J. Deogun and V. Raghavan and A. Sarkar and
                  H. Sever",
  title =    "Data mining: Research trends, challenges, and
                  applications",
  booktitle =    "Roughs Sets and Data Mining: Analysis of Imprecise
                  Data (T. Y. Lin and N. Cercone, eds.), (Boston, MA),
                  pp., Kluwer Academic Publishers, 1997.",
  year =     1997,
  pages =    "9-45"
}

@inproceedings{genearray,
  author =   "P. D'haeseleer and S. Liang and R. Somogyi",
  title =    "Gene Expression Analysis and Genetic Network
                  Modeling",
  booktitle =    "Pacific Symposium on Biocomputing",
  year =     1999
}

@misc{ dhart73,
  author =   "R. Duda and P. Hart",
  title =    "Pattern classification and scene analysis",
  publisher =    "Wiley",
  year =     "1973"
}

% EEEEEEEEEEEEEEEEEEEEEE

@Article{eisen98,
  author =   {M. Eisen and  P. Spellman and P. Brown and D. Botstein},
  title =    {Cluster analysis and display of genome-wide expression patterns},
  journal =      {Proc Natl Acad Sci U.S.A},
  year =     1998,
  volume =   95,
  number =   25,
  pages =    {14863-8}
}


@inproceedings{ dbscan,
  author =   {M. Ester and H. Kriegel and J. Sander and X. Xu},
  title =    "A density-bsed algorithm for discovering clusters in
                  large spatial databases with noise",
  booktitle =    kdd,
  pages =    "226-231",
  year =     "1996"
}

@inproceedings{ ekx95,
  author =   {M. Ester and H. Kriegel and X. Xu},
  title =    "Focussing techniques for efficient class
                  identification",
  booktitle =    "Proc. of the 4th Intl. Sym. of Large Spatial
                  Databases",
  year =     1995
}

% FFFFFFFFFFFFFFFFFFFFFFFF
@inproceedings{ faloutsos94fast,
  author =   "C. Faloutsos and M. Ranganathan and Y. Manolopoulos",
  title =    "Fast subsequence matching in time-series databases",
  booktitle =    sigmod,
  pages =    "419--429",
  year =     1994,
}

@inproceedings{ ferragina96fast,
  author =   "Paolo Ferragina and Roberto Grossi",
  title =    "Fast String Searching in Secondary Storage:
                  Theoretical Developments and Experimental Results",
  booktitle =    "Proceedings of the ACM SODA",
  address =  "Atlanta",
  pages =    "373--382",
  year =     "1996"
}

@article{ ferragina99string,
  author =   "Paolo Ferragina and Roberto Grossi",
  title =    "The string {B}-tree: a new data structure for string
                  search in external memory and its applications",
  journal =  "Journal of the ACM",
  volume =   "46",
  number =   "2",
  pages =    "236--280",
  year =     "1999",
}

@inproceedings{ fisher87,
  author =   "D. H. Fisher",
  title =    "Knowledge acquisition via incremental conceptual
                  clustering",
  booktitle =    "Machine Learning",
  year =     "1987"
}

@inproceedings{ fu95meta-rule-guided,
  author =   "Y. Fu and J. Han",
  title =    "Meta-rule-guided mining of association rules in
                  relational databases",
  booktitle =    "Proc. 1st Int'l Workshop on Integration of Knowledge
                  Discovery with Deductive and Object-Oriented
                  Databases (KDOOD'95), Singapore.",
  year =     "1995",
  pages =    "39-46",
}

@Book{ fuk90,
  author =   "K. Fukunaga",
  title =    "Introduction to Statistical Pattern Recognition",
  publisher =    "Academic Press",
  year =     "1990"
}

% GGGGGGGGGGGGGGGGGGGGGGGGGGG
@inproceedings{boat,
  author =   "J. Gehrke and V. Ganti and R. Ramakrishnan and
                  W. Loh",
  title =    "{BOAT}-- optimistic decision tree construction",
  booktitle =    sigmod,
  year =     1999
}

@inproceedings{rain,
  author =   "J. Gehrke and R. Ramakrishnan and V.
                  Ganti",
  title =    "Rain{F}orest: A Framework for Fast Decision Tree
                  Construction of Large Datasets",
  booktitle =    vldb,
  year =     1998
}

@inproceedings{ patarray,
  author =   "G. Gonnet and R. Baeza-Yates and T. Snider",
  title =    "New indices for text: Pat trees and pat arrays",
  booktitle =    "Information Retrieval: Data Structures and
                  Algorithms",
  publisher =    "Prentice Hall",
  pages =    {335-349},
  year =     "1992"
}

@inproceedings{dual,
  author =   "G. Grahne and L. V.S. Lakshmanan and X. Wang and
                  M. H. Xie",
  title =    "On Dual Mining: From Patterns to Circumstances, and
                  Back",
  booktitle =    icde,
  year =     2001
}

@inproceedings{ cure,
  author =   "S. Guha and R. Rastogi and K. Shim",
  title =    "{CURE}: An efficient clustering algorithm for large
                  databases",
  booktitle =    sigmod,
  year =     "1996"
}

@inproceedings{GCH00,
  author =   "G. Guiffrida and W. W. Chu and D. M. Hanssens",
  title =    "Mining Classification Rules from Datasets with Large
                  Number of Many-Valued Attributes",
  booktitle =    edbt,
  year =     "2000",
  pages =    "335-349"
}


@Book{gusfield97,
  author =   {Dan Gusfield},
  title =    {Algorithms on Strings, Trees, and Sequences},
  publisher =    {Cambridge University Press},
  year =     1997
}

@inproceedings {rtree,
  author =   "A. Guttman",
  title =    "R-trees: a dynamic index structure for spatial
                  searching",
  booktitle =    sigmod,
  year =     1984,
  pages =    "47-57"
}

% HHHHHHHHHHHHHHHHHHHHHHHHHH
@inproceedings{ han95discovery,
  author =   "J. Han and Y. Fu",
  title =    "Discovery of multiple-level association rules from
                  large databases",
  booktitle =    vldb,
  year =     "1995"
}
@inproceedings{han00prefix,
  author =   "J. Han and J. Pei and Y. Yin",
  title =    " Mining Frequent Patterns without Candidate
                  Generation",
  year =     "2000",
  booktitle =    sigmod,
}

@inproceedings{nnhigh,
  author =   "A. Hinneburg and C. Aggarwal and D. Keim",
  title =    "What is the nearest neighbor in high dimensional
                  spaces?",
  booktitle =    vldb,
  year =     2000
}

@inproceedings{ hipp98new,
  author =   "J. Hipp and A. Myka and R. Wirth and U. Guntzer",
  title =    "A new algorithm for faster mining of generalized
                  association rules",
  booktitle =    "Proc. 2nd PKKD, 1998.",
  year =     "1998"
}


@Article{hughes00,
  author =   {Timothy R. Hughes and Matthew J. Marton  et al},
  title =    {Functional Discovery via a Compendium of Expression Profiles},
  journal =      {Cell},
  year =     2000,
  volume =   102,
  pages =    {109-126}
}

% IIIIIIIIIIIIIIIIIIIIIIIIIIII
% JJJJJJJJJJJJJJJJJJJJJJJJJJJJ

@InProceedings{mstring,
  author =   {H. V. Jagadish and N. Koudas and D. Srivastava},
  title =    {On Effective Multi-Dimensional Indexing for Strings},
  booktitle =    sigmod,
  pages =    {403-414},
  year =     2000
}

@InProceedings{jagadish99,
  author =   {H. V. Jagadish and Jason Madar and Raymond Ng},
  title =    {Semantic Compression and Pattern Extraction with Fascicles},
  booktitle =    vldb,
  pages =    {186-196},
  year =     1999
}

% KKKKKKKKKKKKKKKKKKKKKKKKKKKK
@inproceedings{kambermetarule-guided,
  author =   "M. Kamber and J. Han and J. Y. Chiang",
  title =    "Metarule-Guided Mining of Multi-Dimensional
                  Association Rules Using Data Cubes",
  pages =    "207-210",
  booktitle =    kdd,
  year =     1997,
}

@Book{ kr90,
  author =   "L. Kaufmann and P. Rousseuw",
  title =    "Finding groups in data -- An introduction to cluster
                  analysis",
  publisher =    "Wiley series in Porbability and Mathematical
                  Statistics",
  year =     1990
}

% LLLLLLLLLLLLLLLLLLLLLLLLL
@inproceedings{ibm,
  author =   "B. Liu and Y. Ma and C. Wong and P. Yu",
  title =    "Target Selection via Scoring Using Association
                  Rules",
  booktitle =    "IBM Research Report RC 21694",
  year =     2000
}

% MMMMMMMMMMMMMMMMMMMMMMMMM
@article{suffixarray,
  author =   "U. Manber and G. Myers",
  title =    "Suffix Arrays: {A} New Method for On-Line String
                  Searches",
  journal =  "SIAM Journal On Computing",
  volumn =   22,
  numer =    5,
  pages =    "935-948",
  year =     "1993"
}

ResearchIndex
@Article{mc76,
  author =   {E. M. McCreight},
  title =    {A space-economical suffix tree construction
                  algorithm},
  journal =  {Journal of the ACM},
  year =     {1976},
  OPTkey =   {},
  volume =   {23},
  number =   {2},
  pages =    {262-272},
  month =    {April},
  OPTnote =  {},
  OPTannote =    {}
}

@InProceedings{target,
  author =   {Y. Ma and B. Liu and C. K. Wong and P. S. Yu and
                  S. M. Lee},
  title =    {Targeting the Right Students Using Data Mining},
  booktitle =    kdd,
  year =     2000,
  address =  {Zurich, Switzerland},
  month =    {August}
}

@inproceedings{sliq,
  author =   "M. Mehta and R. Agrawal and J. Rissanen",
  title =    "SLIQ: A Fast Scalable Classifier for Data Mining",
  booktitle =    edbt,
  year =     1996
}

@inproceedings{ ms83,
  author =   "R. S. Michalski and R. E. Stepp",
  title =    "Learning from observation: conceptual clustering",
  booktitle =    "Machine Learning: An Artificial Intelligence
                  Approach",
  pages =    "331-363",
  year =     "1983"
}


@InProceedings{mikimouse,
  author =   {R. Miki et al},
  title =    {Delineating developmental and metabolic pathways in vivo by expression profiling Using the RIKEN set of 18,816 full-length enriched mouse {cDNA} arrays},
  booktitle =    {Proceedings of National Academy of Sciences},
  pages =    {2199-2204},
  year =     2001,
  series =   98
}

@inproceedings{surveyclus,
  author =   "F. Murtagh",
  title =    "A survey of recent hierarchical clustering
                  algorithms",
  booktitle =    "The Computer Journal",
  year =     "1983"
}

@inproceedings{oc1,
  author =   "S. K. Murthy and S. Kasif and S. Salzberg",
  title =    "A System for Induction of Oblique Decision Trees",
  booktitle =    "Journal of Artificial Intelligence Research",
  volumn =   2 ,
  year =     1994,
  pages =    "1-32"
}

% NNNNNNNNNNNNNNNNNNNNNNNNNN

@TechReport{mafia,
  author =   {H. Nagesh and S. Goil and A. Choudhary},
  title =    {MAFIA: Efficient and scalable subspace clustering
                  for very large data sets},
  institution =  {Northwestern University},
  year =     1999,
  number =   {9906-010}
}

@inproceedings{ clarans,
  author =   "R. T. Ng and J. Han",
  title =    "Efficient and effective clustering methods for
                  spatial data mining",
  booktitle =    vldb,
  year =     "1994"
}

@inproceedings{ ng98exploratory,
  author =   "R. Ng and L. Lakshmanan and J. Han and A. Pang",
  title =    "Exploratory mining and pruning optimizations of
                  constrained associations rules",
  booktitle =    sigmod,
  year =     "1998",
  pages =    "13-24",
}

% OOOOOOOOOOOOOOOOOOOOOOOOOOOO
% PPPPPPPPPPPPPPPPPPPPPPPPPPPP
@inproceedings{ landmark,
  author =   "Chang-Shing Perng and Haixun Wang and Sylvia
                  R. Zhang and D. Stott Parker",
  title =    "Landmarks: a New Model for Similarity-based Pattern
                  Querying in Time Series Databases",
  booktitle =    icde,
  pages =    "33-42",
  year =     "2000",
}

@inproceedings{farm,
  author =   "Chang-Shing Perng and Haixun Wang and Sheng Ma and
                  Joseph L Hellerstein",
  title =    "A Framework for Exploring Mining Spaces with
                  Multiple Attributes",
  booktitle =    icdm,
  year =     2001
}

% QQQQQQQQQQQQQQQQQQQQQQQQQQQQ
@Book{c45,
  author =   "J. Ross Quinlan",
  title =    "C4.5: Programs for Machine Learning",
  publisher =    "Morgan Kaufmann",
  year =     1993
}

% RRRRRRRRRRRRRRRRRRRRRRRRRRRR

@Misc{geml,
  author =   {Rosetta Biosoftware},
  title =    {{GEML} Expression Markup Language},
  howpublished = {http://www.rosettabio.com/products/conductor/geml/default.htm}
}
@Misc{mage,
  author =   {{MGED} Group},
  title =    {{MAGE-ML}: MicroArray Gene Expression Markup Language},
  howpublished = {http://www.mged.org/Workgroups/MAGE/introduction.html}
}

% SSSSSSSSSSSSSSSSSSSSSSSSSSSS
@inproceedings{sprint,
  author =   "C. Shafer and R. Agrawal and M. Mehta",
  title =    "SPRINT: A Scalable Parallel Classifier for Data
                  Mining",
  booktitle =    vldb,
  year =     1996
}

@inproceedings{sha95,
  author =   "U. Shardanand and P. Maes",
  title =    "Social Information Filtering: Algorithms for
                  Automating 'Word of Mouth'",
  booktitle =    "Proceeding of ACM CHI",
  pages =    "210-217",
  year =     1995
}

@inbook{ shen96metaqueries,
  author =   "W. Shen and K. Ong, B. Mitbander and C. Zaniolo",
  title =    "Metaqueries for data mining",
  booktitle =    " U.M. Fayyad, G. Piatetsky-Shapiro, P. Smyth, and
                  R. Uthurusamy, editors, Advances in Knowledge
                  Discovery and Data Mining.",
  publisher =    " AAAI/MIT press",
  year =     "1996",
  pages =    "375-398"
}

@inproceedings{sequential95,
  author =   "R. Agrawal and R. Srikant",
  title =    "Mining sequential patterns",
  booktitle =    icde,
  year =     "1995"
}

@inproceedings{srikant95mining,
  author =   "R. Srikant and R. Agrawal",
  title =    "Mining generalized association rules",
  booktitle =    vldb,
  pages =    "407--419",
  year =     "1995"
}

@InProceedings{RA95,
  author =   {Ramakrishnan Srikant and Rakesh Agrawal},
  title =    {Mining Generalized Association Rules},
  booktitle =    vldb,
  year =     1995,
  address =  {Zurich, Switzerland},
  month =    {September}
}

@inproceedings{sr96,
  author =   "R. Srikant and R. Agrawal",
  title =    "Mining quantitative association rules in large
                  relational tables",
  booktitle =    sigmod,
  year =     "1996"
}

@inproceedings{ srikant-mining,
  author =   "R. Srikant and Q. Vu and R. Agrawal",
  title =    "Mining association rules with item constraints",
  booktitle =    kdd,
  pages =    "67-93",
  year =     1997
}

% TTTTTTTTTTTTTTTTTTTTTTTTTT
% UUUUUUUUUUUUUUUUUUUUUUUUUU
@inproceedings{UB90,
  author =   "P. E. Utgoff and C. E. Brodley",
  title =    "An incremental method for finding multivariate
                  splits for decision trees",
  booktitle =    icml,
  pages =    "58-65",
  year =     1990
}

.
@article{ue92,
  author =   "E. Ukkonen",
  title =    "Constructing suffix-trees on-line in linear time",
  journal =  "Algorithms, Software, Architecture: Information
                  Processing",
  volumn =   1,
  pages =    "484-92",
  year =     1992
}

% VVVVVVVVVVVVVVVVVVVVVVVVVV
% WWWWWWWWWWWWWWWWWWWWWWWWWW
@inproceedings{pcluster,
  author =   "Haixun Wang and Wei Wang and Jiong Yang and Philip
                  S. Yu",
  title =    "Clustering by Pattern Similarity in Large Data Sets",
  booktitle =    sigmod,
  year =     2002,
}

@inproceedings{weighted,
  author =   "Haixun Wang and Chang-shing Perng and Wei Fan and Sanghyun Park  and Philip S. Yu",
  title =    "Indexing Weighted Sequences in Large Databases",
  booktitle =    icde,
  year =     2003,
}

@Misc{wseq,
  author =   "Haixun Wang and Chales Perng and Wei Fan and Philip S. Yu",
  title =    "Indexing Weighted-Sequences in Large Databases",
  howpublished =     "Submitted for publication",
  year =     "2002",
}

@inproceedings{cmp,
  author =   "Haixun Wang and Carlo Zaniolo",
  title =    "{CMP}: A Fast Decision Tree Classifier Using
                  Multivariate Predictions",
  booktitle =    icde,
  pages =    "449-460",
  year =     "2000",
}

@inproceedings{weiss,
  author =   "G. Weiss and F. Provost",
  title =    "The Effect of Class Distribution on Classifier
                  Learning",
  booktitle =    "Technical Report ML-TR-43, Dept. of Computer Sci.,
                  Rutgers University",
  year =     2001
}

@inproceedings{hetero,
  author =   "D. Wilson and T. Martinez",
  title =    "Improved Heterogeneous Distance Functions",
  booktitle =    "Journal of Artificial Intelligence Research",
  pages =    "1-34",
  volumn =   6,
  year =     1997
}

% XXXXXXXXXXXXXXXXXXXXXXXXXX
% YYYYYYYYYYYYYYYYYYYYYYYYYY
@inproceedings{deltaicde,
  author =   "Jiong Yang and Wei Wang and Haixun Wang and Philip S
                  Yu",
  title =    "$\delta$-Clusters: Capturing Subspace COrrelation in
                  a Large Data Set",
  booktitle =    icde,
    pages = "517-528",
  year =     2002
}

% ZZZZZZZZZZZZZZZZZZZZZZZZZZ
@inproceedings{elkan,
  author =   "B. Zadrozny and C. Elkan",
  title =    "Learning and Making Decisions When Costs and
                  Probabilities are Both Unknown",
  booktitle =    "Technical Report CS2001-0664, Dept. of Computer
                  Sci., UCSD",
  year =     2001
}

@inproceedings{ birch,
  author =   "T. Zhang and R. Ramakrishnan and M. Livny",
  title =    "BIRCH: An efficient data clustering method for very
                  large databases",
  booktitle =    sigmod,
  pages =    "103--114",
  year =     "1996"
}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@inproceedings{yeast,
  author =   "S. Tavazoie and J. Hughes and M. Campbell and R. Cho
                  and G. Church",
  title =    "Yeast Micro Data Set",
  booktitle =
                  "http://arep.med.harvard.edu/biclustering/yeast.matrix",
  year =     "2000",
}

@inproceedings{movie,
  author =   "John Riedl and Joseph Konstan",
  title =    "MovieLens Dataset",
  booktitle =    "http://www.cs.umn.edu/Research/GroupLens",
}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@inproceedings{Gol97,
    author = "R. Goldman and J. Widom",
    title  = "{DataGuides: Enable} query formulation and optimization in
              semistructured databases",
    booktitle = "VLDB",
    year = "1997",
    month = "August",
    pages = "436-445"
}

@inproceedings{Mil99,
    author = "T. Milo and D. Suciu",
    title = "Index structures for path expression",
    booktitle = "Proceedings of 7th International Conference on
                 Database Theory (ICDT)",
    year = "1999",
    month = "January",
    pages = "277-295"
}

@inproceedings{Li01,
    author = "Q. Li and B. Moon",
    title = "Indexing and querying {XML} data for regular path expressions",
    booktitle = "VLDB",
    year = "2001",
    month = "September",
    pages = "361-370"
}

@inproceedings{Coo01,
    author = "B. Cooper and N. Sample and M. J. Franklin and G. R. Hjaltason
              and M. Shadmon",
    title = "A fast index for semistructured data",
    booktitle = "VLDB",
    year = "2001",
    month = "September",
    pages = "341-350"
}

@inproceedings{Chu02,
    author = "C. Chung and J. Min and K. Shim",
    title = "{APEX: An} adaptive path index for {XML} data",
    booktitle = "ACM SIGMOD",
    year = "2002",
    month = "June"
}

@inproceedings{Kau02,
    author = "R. Kaushik and P. Bohannon and N. Naughton and H. Korth",
    title = "Covering indexes for branching path queries",
    booktitle = "ACM SIGMOD",
    year = "2002",
    month = "June"
}

@book{Abi99,
    author = "S. Abiteboul and P. Buneman and D. Suciu",
    title = "Data on the web: from relations to semistructured data and {XML}",
    publisher = "Morgan Kaufmann Publishers",
    address = "Los Altos, CA 94022, USA",
    year = "1999"
}



@inproceedings{Fle02,
    author = "S. Flesca and F. Furfaro and S. Greco",
    title = "A Graphical Query Language",
    booktitle = icde,
    year = "2002",
}

@inproceedings{xmlgl,
    author = "S. Ceri and S. Comai and E. Damiani and P. Fraternali and S. Paraboschi and L. Tanca",
    title = "{XML-GL}: A graphical language for querying and restructuring XML documents",
    booktitle = "Proceedings of the 8th International World Wide Web Conference",
    pages="93-109",
    year = "1999"
}

@inproceedings{f+b,
    author = "R. Kaushik and P. Bohannon and J. F. Naughton and H. F. Korth",
    title = "Covering Indexes for Branching Path Queries",
    booktitle = sigmod,
    year = 2002
}


@Article{lorel,
  author =   {S. Abiteboul and D. Quass and J. McHugh and J. Widom and J. L. Wiener},
  title =    {The {LOREL} query language for semistructered data},
  journal =      {International Journal on Digital Libraries},
  year =     1997,
  volume =   1,
  number =   1,
  pages =    {68-88}
}

@TechReport{xquery,
  author =   {D. Chamberlin and D. Florescu and J. Robie and J. Simon and M. Stefanescu},
  title =    {{XQuery}: A query language for {XML} {W3C} working draft},
  institution =  {World Wide Web Consortium},
  year =     2001,
  number =   {WD-xquery-20010215}
}
@TechReport{xpath,
  author =   {J. Clark and S. DeRose},
  title =    {{XML} path language ({XPath}) version 1.0 w3c recommendation},
  institution =  {World Wide Web Consortium},
  year =     1999,
  number =   {REC-xpath-19991116}
}

@InProceedings{quilt,
  author =   {D. Chamberlin and J. Robie and D. Florescu},
  title =    {Quilt: An {XML} query language for heterogeneous data sources},
  booktitle =    {WebDB},
  year =     2000,
  month =    {May}
}

@InProceedings{xmlql,
  author =   {A. Deutsch and M. Fernandez and D. Florescu and A. Levy and D. Suciu},
  title =    {A query language for {XML}},
  booktitle =    {Proceedings of the 8th International World Wide Web Conference},
  pages =    {77-91},
  year =     1999,
  month =    {May}
}

@InProceedings{cohen02,
  author =   {Edith Cohen and Haim Kaplan and Tova Milo},
  title =    {Labeling Dynamic {XML} Trees},
  booktitle =    pods,
  pages =    {271-281},
  year =     2002
}

@TechReport{isoreport,
  author =   {H. Wang and C. Perng and W. Fan and S. Park and P. Yu},
  title =    {Indexing Weighted-Sequences in Large Databases},
  institution =  {IBM T. J. Watson Research Center},
  year =     2002,
  number =   {Research Report},
  address =  {Hawthorne, NY},
  month =    {February}
}

@Misc{imdb,
  title =    {The Internet Movie Database},
  howpublished = {http://www.imdb.com},
  year =     2000
}

@Misc{dblp,
  author =   {Michael Ley},
  title =    {{DBLP} database web site},
  howpublished = {http://www.informatik.uni-trier.de/~ley/db},
  year =     2000
}

@Misc{xmark,
  title =    {{XMARK}: The {XML}-benchmark project},
  howpublished = {http://monetdb.cwi.nl/~xml},
  year =     2002
}

@TechReport{xmarkreport,
  author =   {A. R. Schmidt and F. Waas and M. L. Kersten and D. Florescu and I. Manolescu and M. J. Carey and R. Busse},
  title =    {The {XML} Benchmark Project},
  institution =  {Centrum voor Wiskunde en Informatica},
  year =     2001,
  number =   {INS-R0103}
}

@InProceedings{abiteboul01,
  author =   {S. Abiteboul and H. Kaplan and T. Milo},
  title =    {Compact labeling schemes for ancestor queries},
  booktitle =    {Proc. {ACM-SIAM} Symposium on Discrete Algorithms(SODA)},
  year =     2001
}

@InProceedings{kaplan02,
  author =   {H. Kaplan and T. Milo and and R.Shabo},
  title =    {A comparison of labeling schemes for ancestor queries},
  booktitle =    {Proc. {ACM-SIAM} Symposium on Discrete Algorithms(SODA)},
  year =     2002
}

@InProceedings{alstrup02,
  author =   {S. Alstrup and T. Rauhe},
  title =    {Improved labeling scheme for ancestor queries},
  booktitle =    {Proc. {ACM-SIAM} Symposium on Discrete Algorithms(SODA)},
  year =     2002
}

@inproceedings{domingos00mining,
    author = "P. Domingos and G. Hulten",
    title = "Mining high-speed data streams",
    booktitle = kdd,
    pages = "71-80",
  address="Boston, MA",
    year = "2000",
  publisher="ACM Press",
}

@inproceedings{ hulten01time,
  author = "G. Hulten and L. Spencer and P. Domingos",
  title = "Mining Time-Changing Data Streams",
  booktitle=kdd,
  year="2001",
  pages="97-106",
  address="San Francisco, CA",
  publisher="ACM Press",
}

@InProceedings{chen02regres,
  author =   {Y. Chen and G. Dong and J. Han and B. W. Wah and J. Wang},
  title =    {Multi-Dimensional Regression Analysis of Time-Series Data Streams},
  booktitle =    vldb,
  year =     2002,
  address =  {Hongkong, China}
}

,


@TechReport{hifireport,
  author =   {Haixun Wang and Chang-Shing Perng and Sheng Ma and Philip S. Yu},
  title =    {Mining Associations by Pattern Structure in Large Relational Tables},
  institution =  {IBM T. J. Watson},
  year =     2002
}

@InProceedings{hifi,
  author =   {Haixun Wang and Chang-Shing Perng and Sheng Ma and Philip S. Yu},
  title =    {Mining Associations by Pattern Structure in Large Relational Tables},
  booktitle =    icdm,
  year =     2002,
  address =  {Maebashi City, Japan},
  month =    {Dec}
}

@Article{hoeffding,
  author =   {W. Hoeffding},
  title =    {Probability Inequalities for Sums of Bounded Random Variables},
  journal =      {Journal of the American Statistical Association},
  year =     1963,
  volume =   58,
  pages =    {13-30}
}

@InProceedings{triexml,
  author =   {Brian F. Cooper and Neal Sample and Michael Franklin and G,Am(Bsli Hjaltason and Moshe Shadmon},
  title =    {A Fast Index for Semistructured Data},
  pages = "341-350",
  booktitle =    vldb,
  year =     2001
}



@InProceedings{Ferhatosmanoglu01,
  author =   {Hakan Ferhatosmanoglu and Ertem Tuncel and Divyakant Agrawal and Amr El Abbadi},
  title =    {Approximate Nearest Neighbor Searching in Multimedia Databases},
  booktitle =    icde,
  pages =    {503-511},
  year =     2001
}

@inproceedings{ korn00influence,
    author = "Flip Korn and S. Muthukrishnan",
    title = "Influence sets based on reverse nearest neighbor queries",
booktitle = sigmod,
    pages = "201--212",
    year = "2000",
}


@Article{faloutsos94,
  author =   {C. Faloutsos and R. Barber and M. Flickner and J. Hafner},
  title =    {Efficient and Effective Querying by Image Content},
  journal =      {Journal of Intelligent Information Systems},
  year =     1994,
  volume =   3,
  pages =    {231-262}
}

@InProceedings{seidl97,
  author =   {T. Seidl and H. P. Kriegel},
  title =    {Efficient User-Adaptable Similarity Search in Large Multimedia Databases},
  booktitle =    vldb,
  year =     1997
}

@Article{liao02,
  author =   {Yihua Liao and V. Rao Vemuri},
  title =    {Use of K-Nearest Neighbor Classifier for Intrusion Detection},
  journal =      {Computers and Security},
  year =     2002,
  volume =   21,
  number =   5,
  pages =    {439-448}
}

@InProceedings{bonchi99,
  author =   {F. Bonchi and F. Giannotti and G. Mainetto and D. Pedreschi},
  title =    {Using Data Mining Techniques in Fiscal Fraud Detection},
  booktitle =    {International Conference on Data Warehousing and Knowledge Discovery },
  pages =    {369-376},
  year =     1999
}

@InProceedings{agrawal95,
  author =   {R. Agrawal and K. I. Lin and H. S. Sawhney and K. Shim},
  title =    {Fast Similarity Search in the Presence of Noise, Scaling, and Translation in Time-Series Databases},
  booktitle =    vldb,
  pages =    {490-501},
  year =     1995
}

@inproceedings{agrawal93,
    author = "Rakesh Agrawal and Christos Faloutsos and Arun N. Swami",
    title = "Efficient Similarity Search In Sequence Databases",
    booktitle = "International Conference of Foundations of Data Organization and Algorithms ({FODO})",
    pages = "69--84",
    year = "1993",
}


@InProceedings{beyer99,
  author =   {K. Beyer and J. Goldstein and R. Ramakrishnan and U. Shaft},
  title =    {When is Nearest Neighbors Meaningful},
  booktitle =    icdt,
  pages =    "217--235",
  year =     1999
}

@inproceedings{indyk98,
    author = "Piotr Indyk",
    title = "On Approximate Nearest Neighbors in Non-Euclidean Spaces",
    booktitle = "{IEEE} Symposium on Foundations of Computer Science",
    pages = "148-155",
    year = "1998",
}


@article{ chavez99,
    author = "Edgar Ch,Aa(Bvez and Gonzalo Navarro and Ricardo A. Baeza-Yates and Jos,Ai(B L. Marroqu,Am(Bn",
    title = "Searching in metric spaces",
    journal = "ACM Computing Surveys",
    volume = "33",
    number = "3",
    pages = "273-321",
    year = "2001",
}


@InProceedings{gao02queries,
  author =   {L. Gao and X. Wang},
  title =    {Continually Evaluating Similarity-Based Pattern Queries on a Streaming Time Series},
  booktitle =    sigmod,
  year =     2002,
  address =  {Madison, Wisconsin},
  month =    {June}
}

@InProceedings{roussopoulos95,
  author =   {N. Roussopoulos and S. Kelley and F. Vincent},
  title =    {Nearest Neighbor Queries},
  booktitle =    sigmod,
  pages =    {71-79},
  year =     1995,
  address =  {San Jose, CA},
  month =    {May}
}

@Article{fukunaga75,
  author =   {K. Fukunaga and P. M. Narendra},
  title =    {A branch and bound algorithm for computing k-nearest neighbors},
  journal =      {IEEE Trans. on Computers (TOC)},
  year =     1975,
  volume =   24,
  number =   7,
  pages =    {750-753},
  month =    {July}
}

@InProceedings{brin95,
  author =   {Sergey Brin},
  title =    {Near Neighbor Search in Large Metric Spaces},
  booktitle =    vldb,
  year =     1995,
  address =  {Switzerland}
}


@InProceedings{hill95,
  author =   {William Hill and Lawrence Stead and M. Rosenstein and G. Furnas},
  title =    {Recommending and evaluating choices in a virtual community of use},
  booktitle =    {Proceedings of ACM CHI},
  pages =    {194-201},
  year =     1995
}


@Proceedings{soboroff99,
  title =    {Proceedings of the SIGIR-99 Workshop on Recommender Systems},
  year =     1999,
  editor =   {I. Soboroff and C. Nicholas and M. Pazzani}
}

I. Soboroff, C. Nicholas, and M. Pazzani, editors. Proceedings of the SIGIR-99 Workshop on Recommender Systems, Berkeley, California, 1999.


@article{ cohen00,
    author = "William W. Cohen and Wei Fan",
    title = "{Web}-collaborative filtering: recommending music by crawling the {Web}",
    journal = "Computer Networks (Amsterdam, Netherlands: 1999)",
    volume = "33",
    number = "1--6",
    pages = "685--698",
    year = "2000",
 }


@article{ jacobs00,
    author = "David W. Jacobs and Daphna Weinshall and Yoram Gdalyahu",
    title = "Classification with Nonmetric Distances: Image Retrieval and Class Representation",
    journal = "IEEE Transactions on Pattern Analysis and Machine Intelligence",
    volume = "22",
    number = "6",
    pages = "583-600",
    year = "2000",
}

@Article{uhlmann91,
  author =   {J. K. Uhlmann},
  title =    {Satisfying general proximity/similarity quereis with metric trees},
  journal =      {Information Processing Letters},
  year =     1991,
  volume =   40,
  number =   4,
  month =    {Nov.}
}

@inproceedings{ yianilos93,
    author = "Peter N. Yianilos",
    title = "Data Structures and Algorithms for Nearest Neighbor Search in General Metric Spaces",
    booktitle = "{SODA}: {ACM}-{SIAM} Symposium on Discrete Algorithms",
    year = "1993",
}


@article{ bauer99empirical,
    author = "Eric Bauer and Ron Kohavi",
    title = "An Empirical Comparison of Voting Classification Algorithms: Bagging, Boosting, and Variants",
    journal = "Machine Learning",
    volume = "36",
    number = "1-2",
    pages = "105-139",
    year = "1999",
}


@article{ dietterich00ensemble,
    author = "Thomas G. Dietterich",
    title = "Ensemble Methods in Machine Learning",
    journal = "Lecture Notes in Computer Science",
    volume = "1857",
    pages = "1-15",
    year = "2000",
}

@article{ dietterich00experimental,
    author = "Thomas G. Dietterich",
    title = "An Experimental Comparison of Three Methods for Constructing Ensembles of Decision Trees: Bagging, Boosting, and Randomization",
    journal = "Machine Learning",
    volume = "40",
    number = "2",
    pages = "139-157",
    year = "2000",
    url = "citeseer.nj.nec.com/dietterich98experimental.html" }

@inproceedings{ freund96experiments,
    author = "Yoav Freund and Robert E. Schapire",
    title = "Experiments with a New Boosting Algorithm",
    booktitle = icml,
    pages = "148-156",
    year = "1996",
    url = "citeseer.nj.nec.com/freund96experiments.html" }

@InProceedings{hall00distr,
  author =   {L. Hall and K. Bowyer and W. Kegelmeyer and T. Moore and C. Chao},
  title =    {Distributed learning on very large data sets},
  booktitle =    {Workshop on
Distributed and Parallel Knowledge Discover},
  year =     2000
}

@InProceedings{ho95random,
  author =   {T. K. Ho},
  title =    {Random decision forests},
  booktitle =    {Proceedings of the 3rd
International Conference on Document Analysis and Recognition},
  pages =    {278-282},
  year =     1995
}

@TechReport{breiman96online,
  author =   {L. Breiman},
  title =    {Pasting bites together for prediction in large data sets and on-line},
  institution =  {Statistics Dept., UC Berkeley},
  year =     1996,
  url = {ftp.stat.berkeley.edu/pub/users/breiman/pastebite.ps.Z}
}

@article{ tumer96error,
    author = "Kagan Tumer and Joydeep Ghosh",
    title = "Error Correlation and Error Reduction in Ensemble Classifiers",
    journal = "Connection Science",
    volume = "8",
    number = "3-4",
    pages = "385--403",
    year = "1996",
    url = "citeseer.nj.nec.com/tumer96error.html" }


@inproceedings{stolfo97credit, key="Fan",
author="S. Stolfo and W. Fan and W. Lee and A. Prodromidis and P. Chan",
TITLE="Credit Card Fraud Detection Using Meta-learning: Issues and Initial
Results",
booktitle="AAAI-97 Workshop on Fraud Detection and Risk Management",
year="1997"
}

@inproceedings{ kohavi96bias,
    author = "Ron Kohavi and David H. Wolpert",
    title = "Bias Plus Variance Decomposition for Zero-One Loss Functions",
    booktitle = icml,
    pages = "275--283",
    year = "1996",
    url = "citeseer.nj.nec.com/kohavi96bias.html" }

@inproceedings{ domingos00unified,
    author = "P. Domingos",
    title = "A Unified Bias-Variance Decomposition and its Applications",
    booktitle = icml,
    pages = "231--238",
    year = "2000",
}




@Article{geman92neural,
  author =   {S. Geman and E. Bienenstock and R. Doursat},
  title =    {Neural Networks and the Bias/Variance Dilemma},
  journal =      {Neural Computation},
  year =     1992,
  volume =   4,
  number =   1,
  pages =    {1-58}
}


@inproceedings{ keogh01locally,
    author = "E. Keogh and K. Chakrabarti and S. Mehrotra and M. Pazzani",
    title = "Locally Adaptive Dimensionality Reduction for Indexing Large Time Series Databases",
    booktitle = sigmod,
    year = "2001",
    url = "citeseer.nj.nec.com/keogh01locally.html" }


@InProceedings{babcock02stream,
  author =   {B. Babcock and S. Babu and M. Datar and R. Motawani and J. Widom},
  title =    {Models and issues in data stream systems},
  booktitle =    pods,
  year =     2002
}



@InProceedings{garofalakis02stream,
  author =   {M. Garofalakis and J. Gehrke and R. Rastogi},
  title =    {Querying and mining data streams: You only get one look},
  booktitle =    sigmod,
  year =     2002
}



@InProceedings{greenwald01space,
  author =   {M. Greenwald and S. Khanna},
  title =    {Space-Efficient Online Computation of Quantile Summaries},
  booktitle =    sigmod,
  pages =    {58-66},
  year =     2001,
  address =  {Santa Barbara, CA},
  month =    {May}
}

@Article{utgoff89inc,
  author =   {P. E. Utgoff},
  title =    {Incremental induction of decision trees},
  journal =      {Machine Learning},
  year =     1989,
  volume =   4,
  pages =    {161-186}
}

@InProceedings{ street01streaming,
  author = "W. Nick Street and YongSeog Kim",
  title = "A Streaming Ensemble Algorithm ({SEA}) for Large-Scale Classification",
  booktitle = kdd,
  year = 2001,
}

@Article{babu01continuous,
  author =   {S. Babu and J. Widom},
  title =    {Continuous queries over data streams},
  journal =  {{SIGMOD} Record},
  year = 2001,
  volume = 30,
  pages = {109-120}
}

@InProceedings{guha00clustering,
  author =   {S. Guha and N. Milshra and R. Motwani and L. O'Callaghan},
  title =    {Clustering data streams},
  booktitle =    focs,
  pages =    {359-366},
  year =     2000
}

@InProceedings{blind,
title = {reference removed for double blind reviewing}
}

@book{Mitchell-ML, key="Mitchell",
author="Tom M. Mitchell",
title="Machine Learning",
year="1997",
publisher="McGraw Hill"
}


@inproceedings{Cohen-95, key="Cohen",
author="William Cohen",
TITLE="Fast Effective Rule Induction",
booktitle=icml,
pages="115-123",
year="1995"
}


@inproceedings{fan02,
 author = "Wei Fan and Fang Chu and Haixun Wang and Philip S. Yu",
title = "Pruning and dynamic scheduling of cost-sensitive ensembles",
 booktitle = "Proceedings of the 18th National Conference on Artificial Intelligence (AAAI)",
year =2002
}


@InProceedings{fan02cost,
  author =   {W. Fan and H. Wang and P. Yu and S. Stolfo},
  title =    {A Framework for Scalable Cost-sensitive Learning Based on Combining Probabilities and Benefits},
  booktitle =    sdm,
  year =     2002
}

@InProceedings{fan02progress,
author = {W. Fan and H. Wang and P. Yu and S. Lo},
title = {Progressive Modeling},
booktitle = icdm,
year =2002
}

@InProceedings{fan03one,
author = {W. Fan and H. Wang and P. Yu and S. Lo},
title = {Inductive Learning in Less than One Sequential Scan},
booktitle = ijcai,
year =2003
}

 @InProceedings{streamensemble,
author="Haixun Wang and  Wei Fan and Philip S. Yu and Jiawei Han",
title="Mining Concept-Drifting Data Streams using Ensemble Classifiers",
booktitle=kdd,
year=2003
}

@InProceedings{zadronzny01,
author="B. Zadronzny and C. Elkan",
title="Obtaining Calibrated Probability Estimates from Decision Trees and Naive Bayesian Classifiers",
booktitle=icml,
year=2001
}

@InProceedings{li01cmar,
author="Wenmin Li and Jiawei Han and Jian Pei",
title="{CMAR}: Accurate and efficient classification based on multiple class-association rules",
booktitle=icdm,
year=2001
}

@InProceedings{liu98cba,
author="Bing Liu and Wynne Hsu and Yiming Ma",
title="Integrating Classification and Association Rule Mining",
booktitle=kdd,
year=1998
}

@InProceedings{manku02fpmining,
author="G. Manku and R. Motwani",
title="Approximate frequency counts over data streams",
booktitle=vldb,
year=2002
}

@InProceedings{Chang03estDec,
author="J. H. Chang and W. S. Lee",
title="Finding recent frequent itemsets adaptively over online
data streams",
booktitle=kdd,
year=2003
}


@InProceedings{yun04moment,
author="Yun Chi and Haixun Wang and Philip S. Yu and Richard R. Muntz",
title="Moment: Maintaining Closed Frequent Itemsets over a Stream Sliding Window data streams",
booktitle=icdm,
year=2004
}


@InProceedings{fankdd04,
author="Wei Fan",
title="Systematic data selection to mine concept-drifting data streams",
booktitle=kdd,
year=2004}



@InProceedings{wangicdm05,
author="Peng Wang and Haixun Wang and Xiaochen Wu and Wei Wang and Baile Shi",
title="On Reducing Classifier Granularity in Mining Concept-Drifting Data Streams",
booktitle=icdm,
year=2005}

@TechReport{wangtech05,
  author =   {Peng Wang and Haixun Wang and Xiaochen Wu and Wei Wang and Baile Shi},
  title =    {On Reducing Classifier Granularity in Mining Concept-Drifting Data Streams},
  institution =  {http://wis.cs.ucla.edu/\~\ hxwang/publications/wangtech05.pdf,
IBM T. J. Watson Research Center},
  year =     2005
}

@InProceedings{jka02,
  author =   {M. V. Joshi and V. Kumar and R. Agrawal},
  title =    {Predicting Rare Classes: Can Boosting Make Any Weak Learner Strong?},
  booktitle =    kdd,
  year =     2002
}

@InProceedings{jak01,
author = {M. V. Joshi and R. Agrawal and V. Kumar},
title = {Mining Needles in a Haystack: Classifying Rare Classes via Two-Phase Rule Induction},
booktitle=sigmod,
year=2001
}

@InProceedings{confsdmCormodeM05,
  title =   "Summarizing and Mining Skewed Data Streams",
  author =  "Graham Cormode and S. Muthukrishnan",
  year =    "2005",
  bibdate = "2005-06-27",
  bibsource =   "DBLP,
         http://dblp.uni-trier.de/db/conf/sdm/sdm2005.html#CormodeM05",
  booktitle =   "SDM",
}

@InProceedings{confsdmAggarwalY05,
  title =   "Online Analysis of Community Evolution in Data
         Streams",
  author =  "Charu C. Aggarwal and Philip S. Yu",
  year =    "2005",
  bibdate = "2005-06-27",
  bibsource =   "DBLP,
         http://dblp.uni-trier.de/db/conf/sdm/sdm2005.html#AggarwalY05",
  booktitle =   "SDM",
}

@InProceedings{confsdmLinCWC05,
  title =   "Mining Frequent Itemsets from Data Streams with a
         Time-Sensitive Sliding Window",
  author =  "Chih-Hsiang Lin and Ding-Ying Chiu and Yi-Hung Wu and
         Arbee L. P. Chen",
  year =    "2005",
  bibdate = "2005-06-27",
  bibsource =   "DBLP,
         http://dblp.uni-trier.de/db/conf/sdm/sdm2005.html#LinCWC05",
  booktitle =   "SDM",
}

@InProceedings{confsdmAggarwal05,
  title =   "On Abnormality Detection in Spuriously Populated Data
         Streams",
  author =  "Charu C. Aggarwal",
  year =    "2005",
  bibdate = "2005-06-27",
  bibsource =   "DBLP,
         http://dblp.uni-trier.de/db/conf/sdm/sdm2005.html#Aggarwal05",
  booktitle =   "SDM",
}

@InProceedings{confkddGuhaH05,
  title =   "Wavelet synopsis for data streams: minimizing
         non-euclidean error",
  author =  "Sudipto Guha and Boulos Harb",
  year =    "2005",
  bibdate = "2005-09-26",
  bibsource =   "DBLP,
         http://dblp.uni-trier.de/db/conf/kdd/kdd2005.html#GuhaH05",
  booktitle =   "KDD",
  pages =   "88--97",
  URL =     "http://doi.acm.org/10.1145/1081884",
}

@inproceedings{proactive,
 author = {Ying Yang and Xindong Wu and Xingquan Zhu},
 title = {Combining proactive and reactive predictions for data streams},
 booktitle = kdd,
 year = {2005},
 isbn = {1-59593-135-X},
 pages = {710--715},
 location = {Chicago, Illinois, USA},
 doi = {http://doi.acm.org/10.1145/1081870.1081961},
 }



@InCollection{hmmtutorial,
 author = {Lawrence R. Rabiner},
 title = {A tutorial on hidden Markov models and selected applications in speech recognition},
 booktitle = {Readings in speech recognition},
 year = {1990},
 isbn = {1-55860-124-4},
 pages = {267--296},
 publisher = {Morgan Kaufmann Publishers Inc.},
 address = {San Francisco, CA, USA},
 }

@inproceedings{loadstar,
author={Yun Chi and Philip S. Yu and Haixun Wang and Richard Muntz},
title = {Loadstar: A Load Shedding Scheme for Classifying Data Streams},
booktitle=sdm,
year=2005
}
@inproceedings{loadstardemo,
  author    = {Yun Chi and
               Haixun Wang and
               Philip S. Yu},
  title     = {Loadstar: Load Shedding in Data Stream Mining},
  booktitle = {VLDB},
  year      = {2005},
  pages     = {1303-1305},
}
@inproceedings{ensembleoverfitting,
author={Haixun Wang and Jian Yin and Jian Pei and Philip S. Yu and Jeffrey Xu Yu},
title ={Suppressing Model Overfitting in Mining Concept-Drifting Data Streams},
booktitle=kdd,
year=2006
}




@PhdThesis{ ml,
  author = "Philip Chan",
  title = "An Extensible Meta-Learning Approach for Scalable and Accurate Inductive
    Learning",
 school = "Columbia University",
  year = "1996",
  url = "citeseer.ist.psu.edu/article/chan96extensible.html" }

@inproceedings{ qbc,
    author = "H. S. Seung and Manfred Opper and Haim Sompolinsky",
    title = "Query by Committee",
    booktitle = "Computational Learning Theory",
    pages = "287-294",
    year = "1992",
    url = "citeseer.ist.psu.edu/seung92query.html" }

@Article{highperiod,
  author =   {Jiong Yang and Wei Wang and Philip Yu},
  title =    {Discovering high order periodic patterns},
  journal =      {Knowledge and Information Systems Journal (KAIS)},
  year =     2004,
  volume =   6,
  number =   3,
  pages =    {243-268}
}





@inproceedings{kolter03dwm,
    author = {J. Z. Kolter and M. A. Maloof},
    title = {Dynamic weighted majority: A new ensemble method for tracking concept drift},
    booktitle = icdm,
    year = 2003
}

@inproceedings{stanley03committee,
    author = {K. O. Stanley},
    title = {Learning concept drift with a committee of decision trees},
    booktitle = {Technical Report AI-03-302, Dept. of Computer Sci., Uni. of Texas at Austin, USA},
    year = 2003
}

@inproceedings{tsymbal04conceptdrift,
    author = {A. Tsymbal},
    title = {The problem of concept drift: definitions and related work},
    booktitle = {Technical Report TCD-CS-2004-15, Dept. of Computer Sci., Trinity College Dublin, Ireland},
    year = 2004
}

@inproceedings{widmer96conceptdrift,
    author = {G. Widmer and M. Kubat},
    title = {Learning in the presence of concept drift and hidden contexts},
    booktitle = {Machine learning},
    year = 1996
}

@inproceedings{aggarwal03clustering,
    author = {C. C. Aggarwal and J. Han and J. Wang and P. S. Yu},
    title = {A framework for clustering evolving data streams},
    booktitle = vldb,
    year = 2003
}

@inproceedings{jinggao,
author={Jing Gao and Wei Fan and Jiawei Han and Philip S. Yu}, 
title={A General Framework for Mining Concept-Drifting Data Streams with Skewed Distributions}, 
booktitle=sdm,
year=2007
}
@inproceedings{randomstream,
title = {Systematic data selection to mine concept-
drifting data streams},
author= {Wei Fan},
booktitle=kdd,
year=2004
}
@Misc{kddcup99,
  author =	 {KDDCUP-1999},
  title =	 {The Third International Knowledge Discovery and Data Mining Tools Competition},
  howpublished = {http://kdd.ics.uci.edu/databases/kddcup99/kddcup99.html},
  year =	 1999
}


@inproceedings{shixi08,
title = {Stop Chasing Trends: Discovering High Order Models in Evolving Data.},
author = {Shixi Chen and Haixun Wang and Shuigeng Zhou and Philip S. Yu},
booktitle = icde,
pages = {923-932},
year = {2008},
} 

@inproceedings{charu-framework03,
title = {A framework for clustering evolving data streams},
author = {Charu C. Aggarwal and Jiawei Han and Jianyong Wang and Philip S. Yu},
booktitle = vldb,
pages = {81--92},
year = {2003},
} 

@inproceedings{vlachos-sigmod04,
 author = {Michail Vlachos and Christopher Meek and Zografoula Vagena and Dimitrios Gunopulos},
 title = {Identifying similarities, periodicities and bursts for online search queries},
 booktitle = {SIGMOD '04: Proceedings of the 2004 ACM SIGMOD international conference on Management of data},
 year = {2004},
 isbn = {1-58113-859-8},
 pages = {131--142},
 location = {Paris, France},
 doi = {http://doi.acm.org/10.1145/1007568.1007586},
 publisher = {ACM},
 address = {New York, NY, USA},
 }
